Final Project Code and Shiny App: Tour de France

Author

Tristan Hamilton

The shiny app can be found below. The subsequent code is for wrangling, and experimenting with visualizations before implementing them into the app.

The written report for this project was done in a separate file.

Data: https://github.com/rfordatascience/tidytuesday/blob/master/data/2020/2020-04-07/readme.md

## DATA
library(tdf)
library(tidyverse)
library(lubridate)
library(plotly)
library(readr)

tdf_winners <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-04-07/tdf_winners.csv')

tuesdata <- tidytuesdayR::tt_load('2020-04-07')

    Downloading file 1 of 3: `stage_data.csv`
    Downloading file 2 of 3: `tdf_stages.csv`
    Downloading file 3 of 3: `tdf_winners.csv`
tuesdata <- tidytuesdayR::tt_load(2020, week = 15)

    Downloading file 1 of 3: `stage_data.csv`
    Downloading file 2 of 3: `tdf_stages.csv`
    Downloading file 3 of 3: `tdf_winners.csv`
winners <- tdf::editions %>% 
  select(-stage_results)

all_years <- tdf::editions %>% 
  unnest_longer(stage_results) %>% 
  mutate(stage_results = map(stage_results, ~ mutate(.x, rank = as.character(rank)))) %>% 
  unnest_longer(stage_results) 

stage_all <- all_years %>% 
  select(stage_results) %>% 
  flatten_df()

combo_df <- bind_cols(all_years, stage_all) %>% 
  select(-stage_results)

stage_clean <- combo_df %>% 
  select(edition, start_date,stage_results_id:last_col()) %>% 
  mutate(year = lubridate::year(start_date)) %>% 
  rename(age = age...25) %>% 
  select(edition, year, everything(), -start_date)

tdf_winners <- tuesdata$tdf_winners

Pre-shiny wrangling:

winners <- winners |>
  mutate(year = year(start_date))

winners <- winners |>
  mutate(year_born = year(born)) |>
  mutate(year_died = year(died)) |>
  mutate(lifespan = year_died - year_born)

winners_age <- winners |> group_by(edition, year, winner_name, winner_team) |>
  summarise(winner_age = mean(age))
`summarise()` has grouped output by 'edition', 'year', 'winner_name'. You can
override using the `.groups` argument.
winners_wins <- winners |> group_by(edition, year, winner_name, winner_team) |>
  summarise(stage_wins = sum(stage_wins))
`summarise()` has grouped output by 'edition', 'year', 'winner_name'. You can
override using the `.groups` argument.
winners <- winners |> mutate(age_range = case_when(age <= 23 ~ 
                                                           "Young (< 23)",
                                                         age > 23 & age <= 32 ~ 
                                                           "Average (24-32)",
                                                         age > 32 ~ 
                                                           "Old (> 32)"))
age_bracket_count <- winners |> group_by(age_range) |>
  summarise(num = n())


overall_age <- mean(winners$age)
overall_age2 <- median(winners$age)

winners_df2 <- winners_age |> left_join(winners_wins, by = c("edition", "year", "winner_name", "winner_team"))

winners_df3 <- winners |>
  select(winner_name, distance, time_overall, time_margin)

top_riders <- winners_df2 |> group_by(winner_name) |>
  summarise(tdf_wins = n()) |>
  arrange(desc(tdf_wins)) |>
  slice(1:6)

winners_plot <- ggplot(data = winners_df2, aes(x = year, y = winner_age, label = winner_name, label2 = stage_wins)) +
  geom_line(colour = "deepskyblue1") +
  geom_point(size = 0.75) +
  geom_hline(yintercept = overall_age, colour = "red", linewidth = 0.5) +
  # geom_hline(yintercept = overall_age2, colour = "darkorange", linewidth = 0.5) +
  theme_classic() +
  labs(title = "Ages of Tour de France Winners",
       x = "Year",
       y = "Age of Winner") 

ggplotly(winners_plot)
# age stuff
average_age <- stage_clean |> group_by(year) |>
  summarise(mean_year_age = mean(age))

stage_clean2000 <- stage_clean |>
  filter(year >= 2000)

average_age2000 <- stage_clean2000 |> group_by(team, year) |>
  summarise(avg_age = mean(age))
`summarise()` has grouped output by 'team'. You can override using the
`.groups` argument.
winners2000 <- winners |>
  filter(year >= 2000)

winner_join <- inner_join(winners2000, stage_clean2000)
Joining with `by = join_by(edition, age, year)`
team_age <- winner_join |> group_by(winner_team) |>
  summarise(team_mean_age = mean(age))

winner_age <- winners2000 |> group_by(winner_name, winner_team) |>
  summarise(winner_mean_age = mean(age))
`summarise()` has grouped output by 'winner_name'. You can override using the
`.groups` argument.
age_joined <- winner_age |> left_join(team_age, by = c("winner_team"))

## age comparison
age_dc <- age_joined |> 
  filter(winner_team == "Discovery Channel")


ggplot(data = age_dc, aes(x = reorder(winner_team, team_mean_age), 
                              y = team_mean_age)) +
  geom_point(colour = "red3", size = 6) +
  geom_point(aes(x = winner_name, y = winner_mean_age), 
             colour = "yellow3", size = 6) +
  labs(x = "Name",
       y = "Age") +
  theme_classic() +
  ylim(20, 35)

## country wins
country_wins <- winners |> group_by(nationality) |>
  summarise(country_wins = n()) |>
  arrange(desc(country_wins)) |>
  slice(1:7)

plot_test <- ggplot(data = country_wins, aes(x = reorder(nationality, country_wins), 
                                             y = country_wins, 
                                             text = paste("TDF Wins: ", country_wins))) +
  geom_col(fill = "darkorange2", colour = "black") +
  coord_flip() +
  theme_minimal() +
  labs(x = "Country",
       y = "Number of TDF Wins")

ggplotly(plot_test, tooltip = "text")
## country wins 2
country_wins <- winners |> group_by(nationality) |>
  summarise(country_wins = n()) |>
  arrange(desc(country_wins)) 

plot_test2 <- ggplot(data = country_wins, aes(x = reorder(nationality, country_wins), 
                                              y = country_wins, 
                                              text = paste("TDF Wins: ", country_wins))) +
  geom_col(fill = "deepskyblue3", colour = "black") +
  coord_flip() +
  theme_minimal() +
  labs(x = "Country",
       y = "Number of TDF Wins")

ggplotly(plot_test2, tooltip = "text")
## top riders
rider_wins <- winners |> group_by(winner_name) |>
  summarise(num_wins = n()) |>
  arrange(desc(num_wins)) |>
  slice(1:9)

winners_year75 <- winners |>
  filter(year >= 1975)

winners_year00 <- winners |> 
  filter(year < 1975)

plot_test3 <- ggplot(data = rider_wins, aes(x = reorder(winner_name, num_wins), 
                                             y = num_wins, 
                                             text = paste("TDF Wins: ", num_wins))) +
  geom_col(fill = "deepskyblue4", colour = "black") +
  coord_flip() +
  theme_minimal() +
  labs(x = "Rider Name",
       y = "Number of TDF Wins")

ggplotly(plot_test3, tooltip = "text")
# team wins
team_wins <- winners |> group_by(winner_team) |>
  summarise(team_wins = n()) |>
  arrange(desc(team_wins)) |>
  slice(1:11)

SHINY APP:

library(shinythemes)
library(shiny)
library(ggplot2)
library(plotly)
library(knitr)

rider_choices <- winners_df2 |> distinct(winner_name) |> pull(winner_name)
team_choices <- winner_join |> distinct(winner_team) |> pull(winner_team)
ui <- fluidPage(
  theme = shinytheme("united"),
  tabsetPanel(
    tabPanel("Age of Winners",
             mainPanel(
               selectInput("rider_sel",
                           label = "Choose a Tour de France Winner: (chronological)",
                           choices = rider_choices),
               helpText("Each data point is a Tour de France winner. The horizontal 
                        line represents the mean winner age across all editions, 
                        27.7 years old"),
               helpText("Note: the red-highlighted point(s) display the winning year(s) 
               and respective age of the user-selected champion."),
               plotOutput("winners_plot2"),
               tableOutput("data_table_react"),
             )),
    tabPanel("Age Comparison",
             selectInput("team_sel",
                         label = "Choose a winning team (2000-2019):",
                         choices = team_choices),
                            helpText("The selected team is displayed with its corresponding
                                     Tour de France winner(s)."),
             helpText("Note: Team Age is calculated as the cumulative average of each team's 
                                     mean rider age from 2000 to 2019."),
             mainPanel(
               plotlyOutput("age_plot"),
               tableOutput("team_age_react"),
             )
    ),
    tabPanel("Top Riders",
             radioButtons("output_sel",
                          label = "Choose Display:",
                          choices = c("All-time", "1975 - present", "First Edition - 1974"),
                          selected = "All-time"),
             mainPanel(
               plotlyOutput("rider_plot"),
             )
    ),
    tabPanel("Top Countries",
             radioButtons("output_select",
                          label = "Choose Display:",
                          choices = c("Top-Performing Countries", "All Countries"),
                          selected = "Top-Performing Countries"),
             mainPanel(
               plotlyOutput("country_plot"),
             )
    ),
    tabPanel("Top Teams",
             mainPanel(
               plotlyOutput("team_plot"),
             ))
  ),
)

server <- function(input, output, session) {
  
  winner_reactive <- reactive({
    winners_df2 |> filter(winner_name == input$rider_sel)
  })
  
  output$winners_plot2 <- renderPlot({
    ggplot(data = winners_df2, aes(x = year, y = winner_age)) +
      geom_line(colour = "deepskyblue1") +
      geom_point(size = 0.75) +
      geom_point(data = winner_reactive(), aes(x = year, y = winner_age), 
                 colour = "red", size = 4) +
      geom_hline(yintercept = overall_age, colour = "darkorange", linewidth = 0.5) +
      theme_classic(base_size = 18) +
      labs(title = "Age: Tour de France Winners",
           x = "Year",
           y = "Age (years)")
  })
  output$data_table_react <- renderTable({
    winner_reactive() 
  })
  
  ## rider win output 
  output$rider_plot <- renderPlotly({
    if (input$output_sel == "All-time") {
      rider_wins <- winners |> group_by(winner_name) |>
        summarise(num_wins = n()) |>
        arrange(desc(num_wins)) |>
        slice(1:9)
      
      ggplotly(
        ggplot(data = rider_wins, aes(x = reorder(winner_name, num_wins), 
                                      y = num_wins, 
                                      text = paste("TDF Wins: ", num_wins))) +
          geom_col(fill = "royalblue1", colour = "black") +
          coord_flip() +
          theme_minimal() +
          labs(title = "Most Wins (All-time)",
               x = "Rider Name",
               y = "Number of TDF Wins"),
        tooltip = "text"
      )
    }
    else if (input$output_sel == "First Edition - 1974") {
      rider_wins3 <- winners_year00 |> group_by(winner_name) |>
        summarise(num_wins = n()) |>
        arrange(desc(num_wins)) |>
        slice(1:5)
      
      ggplotly(
        ggplot(data = rider_wins3, aes(x = reorder(winner_name, num_wins), 
                                       y = num_wins, 
                                       text = paste("TDF Wins: ", num_wins))) +
          geom_col(fill = "royalblue4", colour = "black") +
          coord_flip() +
          theme_minimal() +
          labs(title = "Most Wins (First Edition - 1974)",
               x = "Rider Name",
               y = "Number of TDF Wins"),
        tooltip = "text"
      )
    }
    else {
      rider_wins2 <- winners_year75 |> group_by(winner_name) |>
        summarise(num_wins = n()) |>
        arrange(desc(num_wins)) |>
        slice(1:8)
      
      ggplotly(
        ggplot(data = rider_wins2, aes(x = reorder(winner_name, num_wins), 
                                       y = num_wins, 
                                       text = paste("TDF Wins: ", num_wins))) +
          geom_col(fill = "royalblue3", colour = "black") +
          coord_flip() +
          theme_minimal() +
          labs(title = "Most Wins (1975 - present)",
               x = "Rider Name",
               y = "Number of TDF Wins"),
        tooltip = "text"
      )
    }
  })
  
  
  ## country win output
  output$country_plot <- renderPlotly({
    if (input$output_select == "Top-Performing Countries") {
      
      country_wins <- winners |> group_by(nationality) |>
        summarise(country_wins = n()) |>
        arrange(desc(country_wins)) |>
        slice(1:7)
      
      ggplotly(
        ggplot(data = country_wins, aes(x = reorder(nationality, country_wins), 
                                        y = country_wins,
                                        text = paste("TDF Wins: ", country_wins))) +
          geom_col(fill = "aquamarine3", colour = "black") +
          coord_flip() +
          theme_minimal() +
          labs(title = "Top-Performing Countries",
               x = "Country",
               y = "Number of TDF Wins"),
        tooltip = "text"
      )
    }
    else {
      country_wins <- winners |> group_by(nationality) |>
        summarise(country_wins = n()) |>
        arrange(desc(country_wins)) 
      
      ggplotly(
        ggplot(data = country_wins, aes(x = reorder(nationality, country_wins), 
                                        y = country_wins,
                                        text = paste("TDF Wins: ", country_wins))) +
          geom_col(fill = "cadetblue3", colour = "black") +
          coord_flip() +
          theme_minimal() +
          labs(title = "All Countries",
               x = "Country",
               y = "Number of TDF Wins"),
        tooltip = "text"
      )
    }
  })
  
  ## team win output
  output$team_plot <- renderPlotly({
    
    team_wins <- winners |> group_by(winner_team) |>
      summarise(team_wins = n()) |>
      arrange(desc(team_wins)) |>
      slice(1:11)
    
    ggplotly(
      ggplot(data = team_wins, aes(x = reorder(winner_team, team_wins), 
                                   y = team_wins,
                                   text = paste("TDF Wins: ", team_wins))) +
        geom_col(fill = "deepskyblue2", colour = "black") +
        coord_flip() +
        theme_minimal() +
        labs(title = "Teams: Most Wins",
             x = "Team Name",
             y = "Number of TDF Wins"),
      tooltip = "text"
    )
  })
  
  team_age <- winner_join |> group_by(winner_team) |>
    summarise(team_mean_age = mean(age))
  
  winner_age <- winners2000 |> group_by(winner_name, winner_team) |>
    summarise(winner_mean_age = mean(age))
  
  age_joined <- winner_age |> left_join(team_age, by = c("winner_team"))
  
  age_reactive <- reactive({
    age_joined |> filter(winner_team == input$team_sel)
  })
  
  output$age_plot <- renderPlotly({
    
    ggplotly(
      ggplot(data = age_reactive(), aes(x = reorder(winner_team, team_mean_age), 
                                        y = team_mean_age, 
                                        text = paste("Team Age: ", 
                                                     round(team_mean_age, 1)))) +
        geom_point(colour = "red3", size = 6) +
        geom_point(aes(x = winner_name, y = winner_mean_age, 
                       text = paste("Winner Age: ", round(winner_mean_age, 1))), 
                   colour = "yellow2", size = 6) +
        labs(x = "Name",
             y = "Age") +
        theme_classic(base_size = 15) +
        ylim(20, 35),
      tooltip = "text"
    )
  })
}

shinyApp(ui, server)

Shiny applications not supported in static R Markdown documents